test/graphemetest
test/printproperty
test/charwidth
+test/misc
test/valid
test/iterate
test/case
# Be sure to also update these in Makefile and MANIFEST!
set(SO_MAJOR 2)
set(SO_MINOR 1)
-set(SO_PATCH 0)
+set(SO_PATCH 1)
add_definitions (
-DUTF8PROC_EXPORTS
include/utf8proc.h
lib/
lib/libutf8proc.a
-lib/libutf8proc.so -> libutf8proc.so.2.1.0
-lib/libutf8proc.so.2 -> libutf8proc.so.2.1.0
-lib/libutf8proc.so.2.1.0
+lib/libutf8proc.so -> libutf8proc.so.2.1.1
+lib/libutf8proc.so.2 -> libutf8proc.so.2.1.1
+lib/libutf8proc.so.2.1.1
# libutf8proc Makefile
# programs
-MAKE=make
AR?=ar
CC?=gcc
INSTALL=install
# Be sure to also update these ABI versions in MANIFEST and CMakeLists.txt!
MAJOR=2
MINOR=1
-PATCH=0
+PATCH=1
OS := $(shell uname)
ifeq ($(OS),Darwin) # MacOS X
ifneq ($(OS),Darwin)
rm -f libutf8proc.so.$(MAJOR)
endif
- rm -f test/tests.o test/normtest test/graphemetest test/printproperty test/charwidth test/valid test/iterate test/case test/custom
+ rm -f test/tests.o test/normtest test/graphemetest test/printproperty test/charwidth test/valid test/iterate test/case test/custom test/misc
rm -rf MANIFEST.new tmp
$(MAKE) -C bench clean
$(MAKE) -C data clean
ln -f -s libutf8proc.so.$(MAJOR).$(MINOR).$(PATCH) $@.$(MAJOR)
libutf8proc.$(MAJOR).dylib: utf8proc.o
- $(CC) -dynamiclib -o $@ $^ -install_name $(libdir)/$@ -Wl,-compatibility_version -Wl,$(MAJOR) -Wl,-current_version -Wl,$(MAJOR).$(MINOR).$(PATCH)
+ $(CC) $(LDFLAGS) -dynamiclib -o $@ $^ -install_name $(libdir)/$@ -Wl,-compatibility_version -Wl,$(MAJOR) -Wl,-current_version -Wl,$(MAJOR).$(MINOR).$(PATCH)
libutf8proc.dylib: libutf8proc.$(MAJOR).dylib
ln -f -s libutf8proc.$(MAJOR).dylib $@
test/custom: test/custom.c test/tests.o utf8proc.o utf8proc.h test/tests.h
$(CC) $(UCFLAGS) test/custom.c test/tests.o utf8proc.o -o $@
-check: test/normtest data/NormalizationTest.txt test/graphemetest data/GraphemeBreakTest.txt test/printproperty test/case test/custom test/charwidth test/valid test/iterate bench/bench.c bench/util.c bench/util.h utf8proc.o
+test/misc: test/misc.c test/tests.o utf8proc.o utf8proc.h test/tests.h
+ $(CC) $(UCFLAGS) test/misc.c test/tests.o utf8proc.o -o $@
+
+check: test/normtest data/NormalizationTest.txt test/graphemetest data/GraphemeBreakTest.txt test/printproperty test/case test/custom test/charwidth test/misc test/valid test/iterate bench/bench.c bench/util.c bench/util.h utf8proc.o
$(MAKE) -C bench
test/normtest data/NormalizationTest.txt
test/graphemetest data/GraphemeBreakTest.txt
test/charwidth
+ test/misc
test/valid
test/iterate
test/case
# utf8proc release history #
+## Version 2.1.1 ##
+
+2018-04-27
+
+- Fixed composition bug ([#128]).
+
+- Minor build fixes ([#94], [#99], [#113], [#125]).
+
## Version 2.1 ##
-2016-12-16:
+2016-12-26:
- New functions `utf8proc_map_custom` and `utf8proc_decompose_custom`
to allow user-supplied transformations of codepoints, in conjunction
[#78]: https://github.com/JuliaLang/utf8proc/issues/78
[#79]: https://github.com/JuliaLang/utf8proc/issues/79
[#80]: https://github.com/JuliaLang/utf8proc/issues/80
-[#84]: https://github.com/JuliaLang/utf8proc/pull/84
-[#88]: https://github.com/JuliaLang/utf8proc/pull/88
-[#89]: https://github.com/JuliaLang/utf8proc/pull/89
+[#84]: https://github.com/JuliaLang/utf8proc/issues/84
+[#88]: https://github.com/JuliaLang/utf8proc/issues/88
+[#89]: https://github.com/JuliaLang/utf8proc/issues/89
[#90]: https://github.com/JuliaLang/utf8proc/issues/90
+[#94]: https://github.com/JuliaLang/utf8proc/issues/94
+[#99]: https://github.com/JuliaLang/utf8proc/issues/99
+[#113]: https://github.com/JuliaLang/utf8proc/issues/113
+[#125]: https://github.com/JuliaLang/utf8proc/issues/125
+[#128]: https://github.com/JuliaLang/utf8proc/issues/128
UNICODE_VERSION=9.0.0
UnicodeData.txt:
- $(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/UnicodeData.txt
+ $(CURL) $(CURLFLAGS) -o $@ -O http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/UnicodeData.txt
EastAsianWidth.txt:
$(CURL) $(CURLFLAGS) -o $@ -O $(URLCACHE)http://www.unicode.org/Public/$(UNICODE_VERSION)/ucd/EastAsianWidth.txt
end
end
-$stdout << "const utf8proc_uint16_t utf8proc_sequences[] = {\n "
+$stdout << "static const utf8proc_uint16_t utf8proc_sequences[] = {\n "
i = 0
$int_array.each do |entry|
i += 1
end
$stdout << "};\n\n"
-$stdout << "const utf8proc_uint16_t utf8proc_stage1table[] = {\n "
+$stdout << "static const utf8proc_uint16_t utf8proc_stage1table[] = {\n "
i = 0
stage1.each do |entry|
i += 1
end
$stdout << "};\n\n"
-$stdout << "const utf8proc_uint16_t utf8proc_stage2table[] = {\n "
+$stdout << "static const utf8proc_uint16_t utf8proc_stage2table[] = {\n "
i = 0
stage2.flatten.each do |entry|
i += 1
end
$stdout << "};\n\n"
-$stdout << "const utf8proc_property_t utf8proc_properties[] = {\n"
+$stdout << "static const utf8proc_property_t utf8proc_properties[] = {\n"
$stdout << " {0, 0, 0, 0, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, false,false,false,false, 0, 0, UTF8PROC_BOUNDCLASS_OTHER},\n"
properties.each { |line|
$stdout << line
-$stdout << "const utf8proc_uint16_t utf8proc_combinations[] = {\n "
+$stdout << "static const utf8proc_uint16_t utf8proc_combinations[] = {\n "
i = 0
comb1st_indicies.keys.each_index do |a|
offset = 0
--- /dev/null
+/* Miscellaneous tests, e.g. regression tests */
+
+#include "tests.h"
+
+static void issue128(void) /* #128 */
+{
+ utf8proc_uint8_t input[] = {0x72, 0xcc, 0x87, 0xcc, 0xa3, 0x00}; /* "r\u0307\u0323" */
+ utf8proc_uint8_t nfc[] = {0xe1, 0xb9, 0x9b, 0xcc, 0x87, 0x00}; /* "\u1E5B\u0307" */
+ utf8proc_uint8_t nfd[] = {0x72, 0xcc, 0xa3, 0xcc, 0x87, 0x00}; /* "r\u0323\u0307" */
+ utf8proc_uint8_t *nfc_out, *nfd_out;
+ nfc_out = utf8proc_NFC(input);
+ printf("NFC \"%s\" -> \"%s\" vs. \"%s\"\n", (char*)input, (char*)nfc_out, (char*)nfc);
+ check(strlen((char*) nfc_out) == 5, "incorrect nfc length");
+ check(!memcmp(nfc, nfc_out, 6), "incorrect nfc data");
+ nfd_out = utf8proc_NFD(input);
+ printf("NFD \"%s\" -> \"%s\" vs. \"%s\"\n", (char*)input, (char*)nfd_out, (char*)nfd);
+ check(strlen((char*) nfd_out) == 5, "incorrect nfd length");
+ check(!memcmp(nfd, nfd_out, 6), "incorrect nfd data");
+ free(nfd_out); free(nfc_out);
+}
+
+int main(void)
+{
+ issue128();
+ printf("Misc tests SUCCEEDED.\n");
+ return 0;
+}
static utf8proc_bool grapheme_break_extended(int lbc, int tbc, utf8proc_int32_t *state)
{
- int lbc_override = lbc;
- if (state && *state != UTF8PROC_BOUNDCLASS_START)
- lbc_override = *state;
+ int lbc_override = ((state && *state != UTF8PROC_BOUNDCLASS_START)
+ ? *state : lbc);
utf8proc_bool break_permitted = grapheme_break_simple(lbc_override, tbc);
if (state) {
// Special support for GB 12/13 made possible by GB999. After two RI
current_property->comb_index != UINT16_MAX &&
current_property->comb_index >= 0x8000) {
int sidx = starter_property->comb_index;
- int idx = (current_property->comb_index & 0x3FFF) - utf8proc_combinations[sidx];
- if (idx >= 0 && idx <= utf8proc_combinations[sidx + 1] ) {
- idx += sidx + 2;
+ int idx = current_property->comb_index & 0x3FFF;
+ if (idx >= utf8proc_combinations[sidx] && idx <= utf8proc_combinations[sidx + 1] ) {
+ idx += sidx + 2 - utf8proc_combinations[sidx];
if (current_property->comb_index & 0x4000) {
composition = (utf8proc_combinations[idx] << 16) | utf8proc_combinations[idx+1];
} else
* utf8proc is a free/open-source (MIT/expat licensed) C library
* providing Unicode normalization, case-folding, and other operations
* for strings in the UTF-8 encoding, supporting Unicode version
- * 8.0.0. See the utf8proc home page (http://julialang.org/utf8proc/)
+ * 9.0.0. See the utf8proc home page (http://julialang.org/utf8proc/)
* for downloads and other information, or the source code on github
* (https://github.com/JuliaLang/utf8proc).
*
/** The MINOR version number (increased when new functionality is added in a backwards-compatible manner). */
#define UTF8PROC_VERSION_MINOR 1
/** The PATCH version (increased for fixes that do not change the API). */
-#define UTF8PROC_VERSION_PATCH 0
+#define UTF8PROC_VERSION_PATCH 1
/** @} */
#include <stdlib.h>
-#include <sys/types.h>
+
#if defined(_MSC_VER) && _MSC_VER < 1800
// MSVC prior to 2013 lacked stdbool.h and inttypes.h
typedef signed char utf8proc_int8_t;
-const utf8proc_uint16_t utf8proc_sequences[] = {
+static const utf8proc_uint16_t utf8proc_sequences[] = {
97, 98, 99, 100, 101, 102, 103,
104, 105, 106, 107, 108, 109, 110, 111,
112, 113, 114, 115, 116, 117, 118, 119,
56603, 55354, 56604, 55354, 56605, 55354, 56606, 55354,
56607, 55354, 56608, 55354, 56609, };
-const utf8proc_uint16_t utf8proc_stage1table[] = {
+static const utf8proc_uint16_t utf8proc_stage1table[] = {
0, 256, 512, 768, 1024, 1280, 1536,
1792, 2048, 2304, 2560, 2816, 3072, 3328, 3584,
3840, 4096, 4352, 4608, 4864, 5120, 5376, 5632,
18432, 18432, 18432, 18432, 18432, 18432, 18432, 18432,
38656, };
-const utf8proc_uint16_t utf8proc_stage2table[] = {
+static const utf8proc_uint16_t utf8proc_stage2table[] = {
1, 2, 2, 2, 2, 2, 2,
2, 2, 3, 4, 3, 5, 6, 2,
2, 2, 2, 2, 2, 2, 2, 2,
3984, 3984, 3984, 3984, 3984, 3984, 3984, 0,
0, };
-const utf8proc_property_t utf8proc_properties[] = {
+static const utf8proc_property_t utf8proc_properties[] = {
{0, 0, 0, 0, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, false,false,false,false, 0, 0, UTF8PROC_BOUNDCLASS_OTHER},
{UTF8PROC_CATEGORY_CC, 0, UTF8PROC_BIDI_CLASS_BN, 0, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, false, true, false, true, 0, 0, UTF8PROC_BOUNDCLASS_CONTROL},
{UTF8PROC_CATEGORY_CC, 0, UTF8PROC_BIDI_CLASS_BN, 0, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, false, false, false, true, 0, 0, UTF8PROC_BOUNDCLASS_CONTROL},
{UTF8PROC_CATEGORY_LO, 0, UTF8PROC_BIDI_CLASS_L, 0, 7975, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, UINT16_MAX, false, false, false, false, 2, 0, UTF8PROC_BOUNDCLASS_OTHER},
};
-const utf8proc_uint16_t utf8proc_combinations[] = {
+static const utf8proc_uint16_t utf8proc_combinations[] = {
0, 46, 192, 193, 194, 195, 196, 197, 0,
256, 258, 260, 550, 461, 0, 0, 512,
514, 0, 0, 0, 0, 0, 0, 0,